####### Compute students' grade score --------

data <- readRDS("intermediary_outputs/data_out_4.Rds")

# Some kids in Elementary school have grades for High school only classes. 
problem_ef <- which(data$high_school == 0 & is.na(data$FIS_grade) == F )

# Input the average of those grades into the science grade:
data$CIEN_grade[problem_ef] <- 
  (data$FIS_grade[problem_ef] + data$QUI_grade[problem_ef] +
     data$BIO_grade[problem_ef] + data$SOC_grade[problem_ef] + 
     data$FIL_grade[problem_ef])/5 

# Then replace their grades with NA
data$FIS_grade[which(data$high_school == 0 & is.na(data$FIS_grade) == F )] <- NA
data$QUI_grade[which(data$high_school == 0 & is.na(data$QUI_grade) == F )] <- NA
data$BIO_grade[which(data$high_school == 0 & is.na(data$BIO_grade) == F )] <- NA
data$FIL_grade[which(data$high_school == 0 & is.na(data$FIL_grade) == F )] <- NA
data$SOC_grade[which(data$high_school == 0 & is.na(data$SOC_grade) == F )] <- NA

# Other kids in High School had grades for science, but not for Physics and 
# so on... For these ones, I will replace the grades in the high-school only 
# subjects with the average grades in all other subjects:
problem_em <- which(data$high_school == 1 & is.na(data$CIEN_grade) == F)

data$FIS_grade[problem_em] <- 
  (data$LP_grade[problem_em] + data$MAT_grade[problem_em] + data$HIST_grade[problem_em] +
     data$GEO_grade[problem_em] + data$ING_grade[problem_em] + data$ART_grade[problem_em])/6

data$BIO_grade[problem_em] <- 
  (data$LP_grade[problem_em] + data$MAT_grade[problem_em] + data$HIST_grade[problem_em] +
     data$GEO_grade[problem_em] + data$ING_grade[problem_em] + data$ART_grade[problem_em])/6

data$QUI_grade[problem_em] <- 
  (data$LP_grade[problem_em] + data$MAT_grade[problem_em] + data$HIST_grade[problem_em] +
     data$GEO_grade[problem_em] + data$ING_grade[problem_em] + data$ART_grade[problem_em])/6

data$FIL_grade[problem_em] <- 
  (data$LP_grade[problem_em] + data$MAT_grade[problem_em] + data$HIST_grade[problem_em] +
     data$GEO_grade[problem_em] + data$ING_grade[problem_em] + data$ART_grade[problem_em])/6

data$SOC_grade[problem_em] <- 
  (data$LP_grade[problem_em] + data$MAT_grade[problem_em] + data$HIST_grade[problem_em] +
     data$GEO_grade[problem_em] + data$ING_grade[problem_em] + data$ART_grade[problem_em])/6

# Scores excluding students that do not fit the selection criteria 
# and that were not interviewed: ----
var_list <- c("LP", "MAT", "HIST", "GEO", "EDFIS", 
              "ING", "ART", "FIS", "QUI", "BIO",
              "FIL", "SOC", "CIEN")

for (vars in var_list){
  data[flag_select_sample == 0 & is.na(id_interview) == F & 
         is.na(get(paste0(vars, "_grade"))) == F,
     paste0("grade_", vars, "_std_class") := as.vector(
       scale(get(paste0(vars, "_grade")))
       ), 
     by = c("class_id")]
}

# Gen dataset for EFA analysis, to create the score for grades: ------
data_grades_EM <- data %>% 
  subset(high_school == 1 & flag_select_sample == 0 & is.na(id_student) == F,
         select = c(id_student, grade_LP_std_class,  grade_MAT_std_class, 
                    grade_HIST_std_class,
                    grade_GEO_std_class, grade_ING_std_class,
                    grade_ART_std_class, grade_FIS_std_class,
                    grade_QUI_std_class, grade_BIO_std_class, 
                    grade_FIL_std_class, grade_SOC_std_class))  

data_grades_EF <- data %>% 
  subset(high_school == 0 & flag_select_sample == 0 & is.na(id_student) == F,
         select = c(id_student,grade_LP_std_class,  grade_MAT_std_class, 
                    grade_HIST_std_class,
                    grade_GEO_std_class, grade_ING_std_class,
                    grade_ART_std_class, grade_CIEN_std_class))  

data_grades_EM_with_edfis <- data %>% 
  subset(high_school == 1 & flag_select_sample == 0 & is.na(id_student) == F,
         select = c(grade_LP_std_class,  grade_MAT_std_class,
                    grade_HIST_std_class,
                    grade_GEO_std_class, grade_ING_std_class,
                    grade_ART_std_class, grade_FIS_std_class, 
                    grade_QUI_std_class, grade_BIO_std_class, 
                    grade_FIL_std_class, grade_SOC_std_class, 
                    grade_EDFIS_std_class))

data_grades_EF_with_edfis <- data %>% 
  subset(high_school == 0 & flag_select_sample == 0 & is.na(id_student) == F,
         select = c(grade_LP_std_class,  grade_MAT_std_class,
                    grade_HIST_std_class,
                    grade_GEO_std_class, grade_ING_std_class,
                    grade_ART_std_class, grade_CIEN_std_class, 
                    grade_EDFIS_std_class))  

fa_ef <- fa(data_grades_EF[,2:ncol(data_grades_EF)], nfactor = 1)
fa_em <- fa(data_grades_EM[,2:ncol(data_grades_EM)], nfactor = 1)

data$grades_score <- NA
data$grades_score[which(data$id_student %in% data_grades_EM$id_student)] <-
  fa_em$scores

data$grades_score[which(data$id_student %in% data_grades_EF$id_student)] <- 
  fa_ef$scores

## Supply of friends:
data$sd_grades_score_fr_2 <- NA
data$supply_1_sd_fr_2 <- NA
data$supply_2_sd_fr_2 <- NA
data$supply_3_sd_fr_2 <- NA
data$friends_1_sd_fr_2 <- NA
data$friends_2_sd_fr_2 <- NA
data$friends_3_sd_fr_2 <- NA
data$supply_1_sd_same_race_fr_2 <- NA
data$supply_2_sd_same_race_fr_2 <- NA
data$supply_3_sd_same_race_fr_2 <- NA
data$friends_1_sd_same_race_fr_2 <- NA
data$friends_2_sd_same_race_fr_2 <- NA
data$friends_3_sd_same_race_fr_2 <- NA

for (j in 1:length(list_networks_strong)) {
  # j = 4
  my_class_id <- as.numeric(network.vertex.names(list_networks_strong[[j]]))
  class_size <- length(my_class_id)
  
  for (i in 1:class_size) {
    # i = 1
    myneigh <- get.neighborhood(list_networks_strong[[j]], v = i)
    
    myid    <- as.numeric(
      network::network.vertex.names(list_networks_strong[[j]])[i])
    
    myrace  <- network::get.vertex.attribute(
      list_networks_strong[[j]], attrname = "race_simple")[i]
    
    myrace_nw  <- network::get.vertex.attribute(
      list_networks_strong[[j]], attrname = "negro_na")[i]
    
    my_friends_race <- network::get.vertex.attribute(
      list_networks_strong[[j]], attrname = "race_simple")[myneigh]
    
    my_friends <- length(myneigh)
    
    my_friends_id <- as.numeric(
      network::network.vertex.names(list_networks_strong[[j]])[myneigh])
    
    my_score <- data$grades_score[which(data$id_student == myid)]
    
    # Input SD of friends grades
    data$sd_grades_score_fr_2[which(data$id_student == myid)] <- 
      sd(data$grades_score[which(data$id_student %in% my_friends_id)],
         na.rm = T)
    
    # Number of friends within 1 sd from my score:
    data$friends_1_sd_fr_2[which(data$id_student == myid)] <- 
      length(data$grades_score[which(data$id_student %in% my_friends_id &
                                       data$grades_score >= my_score - .5 &
                                       data$grades_score <= my_score + .5 )])
    
    # Number of friends within 2 sd from my score:
    data$friends_2_sd_fr_2[which(data$id_student == myid)] <- 
      length(data$grades_score[which(data$id_student %in% my_friends_id &
                                       data$grades_score >= my_score - 1 &
                                       data$grades_score <= my_score + 1 )])
    
    # Number of friends within 3 sd from my score:
    data$friends_3_sd_fr_2[which(data$id_student == myid)] <- 
      length(data$grades_score[which(data$id_student %in% my_friends_id &
                                       data$grades_score >= my_score - 1.5 &
                                       data$grades_score <= my_score + 1.5 )])
    
    # Number of classmates within 1 sd from my score:
    data$supply_1_sd_fr_2[which(data$id_student == myid)] <- 
      length(data$grades_score[which(data$id_student %in% my_class_id &
                                       data$grades_score >= my_score - .5 &
                                       data$grades_score <= my_score + .5 )])
    
    # Number of classmates within 2 sd from my score:
    data$supply_2_sd_fr_2[which(data$id_student == myid)] <- 
      length(data$grades_score[which(data$id_student %in% my_class_id &
                                       data$grades_score >= my_score - 1 &
                                       data$grades_score <= my_score + 1 )])
    
    # Number of classmates within 3 sd from my score:
    data$supply_3_sd_fr_2[which(data$id_student == myid)] <- 
      length(data$grades_score[which(data$id_student %in% my_class_id &
                                       data$grades_score >= my_score - 1.5 &
                                       data$grades_score <= my_score + 1.5 )])
    
    # Number of classmates within 1 sd from my score of same race:
    data$supply_1_sd_same_race_fr_2[which(data$id_student == myid)] <- 
      length(data$grades_score[which(data$id_student %in% my_class_id &
                                       data$grades_score >= my_score - .5 &
                                       data$grades_score <= my_score + .5 &
                                       data$negro_na == myrace_nw )])
    
    # Number of classmates within 2 sd from my score of same race:
    data$supply_2_sd_same_race_fr_2[which(data$id_student == myid)] <- 
      length(data$grades_score[which(data$id_student %in% my_class_id &
                                       data$grades_score >= my_score - 1 &
                                       data$grades_score <= my_score + 1 &
                                       data$negro_na == myrace_nw)])
    
    # Number of classmates within 3 sd from my score of same race:
    data$supply_3_sd_same_race_fr_2[which(data$id_student == myid)] <-
      length(data$grades_score[which(data$id_student %in% my_class_id &
                                       data$grades_score >= my_score - 1.5 &
                                       data$grades_score <= my_score + 1.5 &
                                       data$negro_na == myrace_nw)])
    
    # Number of friends within 1 sd from my score of same race:
    data$friends_1_sd_same_race_fr_2[which(data$id_student == myid)] <- 
      length(data$grades_score[which(data$id_student %in% my_friends_id &
                                       data$grades_score >= my_score - .5 &
                                       data$grades_score <= my_score + .5 &
                                       data$negro_na == myrace_nw )])
    
    # Number of friends within 2 sd from my score of same race:
    data$friends_2_sd_same_race_fr_2[which(data$id_student == myid)] <- 
      length(data$grades_score[which(data$id_student %in% my_friends_id &
                                       data$grades_score >= my_score - 1 &
                                       data$grades_score <= my_score + 1 &
                                       data$negro_na == myrace_nw)])
    
    # Number of friends within 3 sd from my score of same race:
    data$friends_3_sd_same_race_fr_2[which(data$id_student == myid)] <- 
      length(data$grades_score[which(data$id_student %in% my_friends_id &
                                       data$grades_score >= my_score - 1.5 &
                                       data$grades_score <= my_score + 1.5 &
                                       data$negro_na == myrace_nw)])
  }
}


# Share of friends within 1 sd from own score:
data$sh_friends_1_sd_grade <- data$friends_1_sd_fr_2 / data$friends_2
data$sh_friends_1_sd_grade[which(is.na(data$friends_2) == T)] <- NA
data$sh_friends_1_sd_grade[which(data$friends_2 == 0)] <- NA

# Share of friends within 2 sd from own score:
data$sh_friends_2_sd_grade <- data$friends_2_sd_fr_2 / data$friends_2
data$sh_friends_2_sd_grade[which(is.na(data$friends_2) == T)] <- NA
data$sh_friends_2_sd_grade[which(data$friends_2 == 0)] <- NA

# Share of friends within 2 sd from own score:
data$sh_friends_3_sd_grade <- data$friends_3_sd_fr_2 / data$friends_2
data$sh_friends_3_sd_grade[which(is.na(data$friends_2) == T)] <- NA
data$sh_friends_3_sd_grade[which(data$friends_2 == 0)] <- NA

data$sh_nw_class_std <- (data$sh_nw_class - mean(data$sh_nw_class, na.rm = T))/
  sd(data$sh_nw_class, na.rm = T)

data <- data %>% group_by(class_id) %>%
  mutate(sh_nw_class_special = (sum(negro_na, na.rm = T) - negro_na)/
           (sum(is.na(negro_na) == F)-1))

data$sh_nw_class_special_std <- 
  (data$sh_nw_class_special - mean(data$sh_nw_class_special, na.rm = T)/
     sd(data$sh_nw_class_special, na.rm = T))

## Number of race students within grades strata ------
data$n_black_first_4 <- 0
data$n_black_second_4 <- 0
data$n_black_third_4 <- 0
data$n_black_fourth_4 <- 0

data$n_brown_first_4 <- 0
data$n_brown_second_4 <- 0
data$n_brown_third_4 <- 0
data$n_brown_fourth_4 <- 0

data$n_white_first_4 <- 0
data$n_white_second_4 <- 0
data$n_white_third_4 <- 0
data$n_white_fourth_4 <- 0


data$grade_four <- NA
data$grade_four[which(data$grades_score < -0.5)] = 0
data$grade_four[which(data$grades_score >= -0.5 & data$grades_score < 0)] = 1
data$grade_four[which(data$grades_score >= 0 & data$grades_score < 0.5)] = 2
data$grade_four[which(data$grades_score >= 0.5)] = 3

teste <- data %>% 
  subset(race < 4) %>% 
  group_by(class_id, race, grade_four) %>% 
  summarise("Count" = n())

for (i in unique(teste$class_id) ) {
  # i = 1
  data$n_black_first_4[which(data$class_id == i)] <- 
    max(c(0,as.numeric(teste[which(
      teste$class_id == i & teste$race == 2 & teste$grade_four == 0), 4])), 
      na.rm = T)
  
  data$n_black_second_4[which(data$class_id == i)] <- 
    max(c(0,as.numeric(teste[which(
      teste$class_id == i & teste$race == 2 & teste$grade_four == 1), 4])), 
      na.rm = T)
  
  data$n_black_third_4[which(data$class_id == i)] <- 
    max(c(0,as.numeric(teste[which(
      teste$class_id == i & teste$race == 2 & teste$grade_four == 2), 4])), 
      na.rm = T)
  
  data$n_black_fourth_4[which(data$class_id == i)] <- 
    max(c(0,as.numeric(teste[which(
      teste$class_id == i & teste$race == 2 & teste$grade_four == 3), 4])), 
      na.rm = T)
  
  data$n_brown_first_4[which(data$class_id == i)] <- 
    max(c(0,as.numeric(teste[which(
      teste$class_id == i & teste$race == 3 & teste$grade_four == 0), 4])), 
      na.rm = T)
  
  data$n_brown_second_4[which(data$class_id == i)] <- 
    max(c(0,as.numeric(teste[which(
      teste$class_id == i & teste$race == 3 & teste$grade_four == 1), 4])),
      na.rm = T)
  
  data$n_brown_third_4[which(data$class_id == i)] <- 
    max(c(0,as.numeric(teste[which(
      teste$class_id == i & teste$race == 3 & teste$grade_four == 2), 4])), na.rm = T)
  
  data$n_brown_fourth_4[which(data$class_id == i)] <- 
    max(c(0,as.numeric(teste[which(
      teste$class_id == i & teste$race == 3 & teste$grade_four == 3), 4])), 
      na.rm = T)
  
  data$n_white_first_4[which(data$class_id == i)] <- 
    max(c(0,as.numeric(teste[which(
      teste$class_id == i & teste$race == 1 & teste$grade_four == 0), 4])), 
      na.rm = T)
  
  data$n_white_second_4[which(data$class_id == i)] <- 
    max(c(0,as.numeric(teste[which(
      teste$class_id == i & teste$race == 1 & teste$grade_four == 1), 4])), 
      na.rm = T)
  
  data$n_white_third_4[which(data$class_id == i)] <- 
    max(c(0,as.numeric(teste[which(
      teste$class_id == i & teste$race == 1 & teste$grade_four == 2), 4])),
      na.rm = T)
  
  data$n_white_fourth_4[which(data$class_id == i)] <- 
    max(c(0,as.numeric(teste[which(
      teste$class_id == i & teste$race == 1 & teste$grade_four == 3), 4])),
      na.rm = T)
  
}

fwrite(data, file = "intermediary_outputs/base_with_connection_data.csv")

setnames(data, old = "avg_score_overall_violence_school_class",
         new = "avg_score_ov_viol_sc_class")

fwrite(data, file = "intermediary_outputs/base_with_connection_data_stata.csv")

